In [1]:
import pandas as pd
In [6]:
df = pd.read_csv('Datasets/direct_marketing.csv')
In [7]:
df
Out[7]:
recency
history_segment
history
mens
womens
zip_code
newbie
channel
segment
visit
conversion
spend
DM_category
0
10
2) $100 - $200
142.44
1
0
Surburban
0
Phone
Womens E-Mail
0
0
0.0
4
1
6
3) $200 - $350
329.08
1
1
Rural
1
Web
No E-Mail
0
0
0.0
11
2
7
2) $100 - $200
180.65
0
1
Surburban
1
Web
Womens E-Mail
0
0
0.0
1
3
9
5) $500 - $750
675.83
1
0
Rural
1
Web
Mens E-Mail
0
0
0.0
2
4
2
1) $0 - $100
45.34
1
0
Urban
0
Web
Womens E-Mail
0
0
0.0
4
5
6
2) $100 - $200
134.83
0
1
Surburban
0
Phone
Womens E-Mail
1
0
0.0
1
6
9
3) $200 - $350
280.20
1
0
Surburban
1
Phone
Womens E-Mail
0
0
0.0
4
7
9
1) $0 - $100
46.42
0
1
Urban
0
Phone
Womens E-Mail
0
0
0.0
1
8
9
5) $500 - $750
675.07
1
1
Rural
1
Phone
Mens E-Mail
0
0
0.0
5
9
10
1) $0 - $100
32.84
0
1
Urban
1
Web
Womens E-Mail
0
0
0.0
1
10
7
5) $500 - $750
548.91
0
1
Urban
1
Phone
Womens E-Mail
1
0
0.0
1
11
1
3) $200 - $350
211.45
0
1
Urban
1
Phone
Womens E-Mail
0
0
0.0
1
12
5
5) $500 - $750
642.90
0
1
Surburban
1
Multichannel
Womens E-Mail
0
0
0.0
1
13
2
2) $100 - $200
101.64
0
1
Urban
0
Web
Mens E-Mail
1
0
0.0
3
14
4
3) $200 - $350
241.42
0
1
Rural
1
Multichannel
No E-Mail
0
0
0.0
5
15
3
1) $0 - $100
58.13
1
0
Urban
1
Web
No E-Mail
1
0
0.0
6
16
5
1) $0 - $100
29.99
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
17
9
2) $100 - $200
112.35
1
0
Rural
0
Web
Mens E-Mail
0
0
0.0
2
18
11
3) $200 - $350
219.04
1
1
Surburban
0
Phone
Mens E-Mail
0
0
0.0
5
19
5
6) $750 - $1,000
828.42
1
0
Surburban
1
Multichannel
Mens E-Mail
0
0
0.0
2
20
9
1) $0 - $100
29.99
0
1
Surburban
1
Phone
No E-Mail
0
0
0.0
5
21
11
2) $100 - $200
182.32
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
22
2
2) $100 - $200
118.40
1
0
Surburban
0
Web
Mens E-Mail
1
0
0.0
2
23
2
1) $0 - $100
29.99
0
1
Urban
1
Phone
No E-Mail
0
0
0.0
5
24
4
1) $0 - $100
78.24
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
25
6
2) $100 - $200
139.87
0
1
Rural
1
Web
Mens E-Mail
0
0
0.0
3
26
12
2) $100 - $200
115.39
0
1
Surburban
0
Phone
Mens E-Mail
1
0
0.0
3
27
6
2) $100 - $200
162.98
0
1
Surburban
0
Web
Mens E-Mail
0
0
0.0
3
28
7
4) $350 - $500
435.73
0
1
Urban
1
Web
No E-Mail
0
0
0.0
5
29
2
3) $200 - $350
203.35
1
0
Rural
0
Web
No E-Mail
0
0
0.0
6
...
...
...
...
...
...
...
...
...
...
...
...
...
...
63970
4
2) $100 - $200
191.15
0
1
Surburban
1
Web
Womens E-Mail
0
0
0.0
1
63971
5
5) $500 - $750
549.87
0
1
Surburban
1
Phone
Womens E-Mail
0
0
0.0
1
63972
3
5) $500 - $750
554.97
0
1
Surburban
1
Web
No E-Mail
0
0
0.0
5
63973
8
4) $350 - $500
471.80
1
0
Surburban
1
Multichannel
Womens E-Mail
0
0
0.0
4
63974
8
1) $0 - $100
73.65
0
1
Surburban
0
Phone
Mens E-Mail
0
0
0.0
3
63975
10
6) $750 - $1,000
883.92
1
0
Urban
1
Phone
No E-Mail
0
0
0.0
6
63976
1
5) $500 - $750
710.72
1
1
Urban
1
Phone
No E-Mail
0
0
0.0
11
63977
8
1) $0 - $100
29.99
0
1
Surburban
0
Web
No E-Mail
1
0
0.0
5
63978
10
2) $100 - $200
102.01
0
1
Urban
1
Phone
Mens E-Mail
0
0
0.0
3
63979
10
2) $100 - $200
168.21
0
1
Surburban
0
Phone
No E-Mail
0
0
0.0
5
63980
3
4) $350 - $500
487.10
0
1
Surburban
1
Phone
No E-Mail
0
0
0.0
5
63981
4
2) $100 - $200
125.53
0
1
Rural
1
Phone
No E-Mail
0
0
0.0
5
63982
5
1) $0 - $100
29.99
1
0
Urban
1
Phone
Mens E-Mail
0
0
0.0
2
63983
2
1) $0 - $100
83.03
0
1
Urban
0
Phone
No E-Mail
0
0
0.0
5
63984
2
3) $200 - $350
209.51
0
1
Urban
1
Web
Womens E-Mail
0
0
0.0
1
63985
9
1) $0 - $100
29.99
1
0
Urban
0
Phone
Mens E-Mail
1
0
0.0
2
63986
9
1) $0 - $100
35.26
0
1
Urban
1
Web
Womens E-Mail
0
0
0.0
1
63987
1
1) $0 - $100
79.70
1
0
Surburban
1
Web
No E-Mail
0
0
0.0
6
63988
6
1) $0 - $100
32.98
1
0
Surburban
0
Web
Mens E-Mail
0
0
0.0
2
63989
10
3) $200 - $350
304.30
1
1
Surburban
0
Web
Womens E-Mail
1
0
0.0
5
63990
6
1) $0 - $100
80.02
0
1
Surburban
0
Phone
No E-Mail
0
0
0.0
5
63991
1
3) $200 - $350
306.10
1
0
Surburban
1
Phone
Womens E-Mail
0
0
0.0
4
63992
1
5) $500 - $750
519.69
1
1
Urban
1
Phone
Mens E-Mail
0
0
0.0
5
63993
4
4) $350 - $500
374.07
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
63994
7
1) $0 - $100
86.46
0
1
Urban
0
Web
Mens E-Mail
0
0
0.0
3
63995
10
2) $100 - $200
105.54
1
0
Urban
0
Web
Mens E-Mail
0
0
0.0
2
63996
5
1) $0 - $100
38.91
0
1
Urban
1
Phone
Mens E-Mail
0
0
0.0
3
63997
6
1) $0 - $100
29.99
1
0
Urban
1
Phone
Mens E-Mail
0
0
0.0
2
63998
1
5) $500 - $750
552.94
1
0
Surburban
1
Multichannel
Womens E-Mail
0
0
0.0
4
63999
1
4) $350 - $500
472.82
0
1
Surburban
0
Web
Mens E-Mail
0
0
0.0
3
64000 rows × 13 columns
In [8]:
df.recency
Out[8]:
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
..
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1
Name: recency, dtype: int64
In [9]:
df['recency']
Out[9]:
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
..
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1
Name: recency, dtype: int64
In [10]:
df.loc[:, 'recency']
Out[10]:
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
..
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1
Name: recency, dtype: int64
In [11]:
df.iloc[:, 0]
Out[11]:
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
..
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1
Name: recency, dtype: int64
In [13]:
df.ix[:, 0]
Out[13]:
0 10
1 6
2 7
3 9
4 2
5 6
6 9
7 9
8 9
9 10
10 7
11 1
12 5
13 2
14 4
15 3
16 5
17 9
18 11
19 5
20 9
21 11
22 2
23 2
24 4
25 6
26 12
27 6
28 7
29 2
..
63970 4
63971 5
63972 3
63973 8
63974 8
63975 10
63976 1
63977 8
63978 10
63979 10
63980 3
63981 4
63982 5
63983 2
63984 2
63985 9
63986 9
63987 1
63988 6
63989 10
63990 6
63991 1
63992 1
63993 4
63994 7
63995 10
63996 5
63997 6
63998 1
63999 1
Name: recency, dtype: int64
In [14]:
df[['recency']]
Out[14]:
recency
0
10
1
6
2
7
3
9
4
2
5
6
6
9
7
9
8
9
9
10
10
7
11
1
12
5
13
2
14
4
15
3
16
5
17
9
18
11
19
5
20
9
21
11
22
2
23
2
24
4
25
6
26
12
27
6
28
7
29
2
...
...
63970
4
63971
5
63972
3
63973
8
63974
8
63975
10
63976
1
63977
8
63978
10
63979
10
63980
3
63981
4
63982
5
63983
2
63984
2
63985
9
63986
9
63987
1
63988
6
63989
10
63990
6
63991
1
63992
1
63993
4
63994
7
63995
10
63996
5
63997
6
63998
1
63999
1
64000 rows × 1 columns
In [15]:
df.loc[:, ['recency']]
Out[15]:
recency
0
10
1
6
2
7
3
9
4
2
5
6
6
9
7
9
8
9
9
10
10
7
11
1
12
5
13
2
14
4
15
3
16
5
17
9
18
11
19
5
20
9
21
11
22
2
23
2
24
4
25
6
26
12
27
6
28
7
29
2
...
...
63970
4
63971
5
63972
3
63973
8
63974
8
63975
10
63976
1
63977
8
63978
10
63979
10
63980
3
63981
4
63982
5
63983
2
63984
2
63985
9
63986
9
63987
1
63988
6
63989
10
63990
6
63991
1
63992
1
63993
4
63994
7
63995
10
63996
5
63997
6
63998
1
63999
1
64000 rows × 1 columns
In [17]:
df.iloc[:, [0,1]]
Out[17]:
recency
history_segment
0
10
2) $100 - $200
1
6
3) $200 - $350
2
7
2) $100 - $200
3
9
5) $500 - $750
4
2
1) $0 - $100
5
6
2) $100 - $200
6
9
3) $200 - $350
7
9
1) $0 - $100
8
9
5) $500 - $750
9
10
1) $0 - $100
10
7
5) $500 - $750
11
1
3) $200 - $350
12
5
5) $500 - $750
13
2
2) $100 - $200
14
4
3) $200 - $350
15
3
1) $0 - $100
16
5
1) $0 - $100
17
9
2) $100 - $200
18
11
3) $200 - $350
19
5
6) $750 - $1,000
20
9
1) $0 - $100
21
11
2) $100 - $200
22
2
2) $100 - $200
23
2
1) $0 - $100
24
4
1) $0 - $100
25
6
2) $100 - $200
26
12
2) $100 - $200
27
6
2) $100 - $200
28
7
4) $350 - $500
29
2
3) $200 - $350
...
...
...
63970
4
2) $100 - $200
63971
5
5) $500 - $750
63972
3
5) $500 - $750
63973
8
4) $350 - $500
63974
8
1) $0 - $100
63975
10
6) $750 - $1,000
63976
1
5) $500 - $750
63977
8
1) $0 - $100
63978
10
2) $100 - $200
63979
10
2) $100 - $200
63980
3
4) $350 - $500
63981
4
2) $100 - $200
63982
5
1) $0 - $100
63983
2
1) $0 - $100
63984
2
3) $200 - $350
63985
9
1) $0 - $100
63986
9
1) $0 - $100
63987
1
1) $0 - $100
63988
6
1) $0 - $100
63989
10
3) $200 - $350
63990
6
1) $0 - $100
63991
1
3) $200 - $350
63992
1
5) $500 - $750
63993
4
4) $350 - $500
63994
7
1) $0 - $100
63995
10
2) $100 - $200
63996
5
1) $0 - $100
63997
6
1) $0 - $100
63998
1
5) $500 - $750
63999
1
4) $350 - $500
64000 rows × 2 columns
In [18]:
df.iloc[0:2, :]
Out[18]:
recency
history_segment
history
mens
womens
zip_code
newbie
channel
segment
visit
conversion
spend
DM_category
0
10
2) $100 - $200
142.44
1
0
Surburban
0
Phone
Womens E-Mail
0
0
0.0
4
1
6
3) $200 - $350
329.08
1
1
Rural
1
Web
No E-Mail
0
0
0.0
11
In [20]:
df.loc[0:1, :]
Out[20]:
recency
history_segment
history
mens
womens
zip_code
newbie
channel
segment
visit
conversion
spend
DM_category
0
10
2) $100 - $200
142.44
1
0
Surburban
0
Phone
Womens E-Mail
0
0
0.0
4
1
6
3) $200 - $350
329.08
1
1
Rural
1
Web
No E-Mail
0
0
0.0
11
In [19]:
df[0:2]
Out[19]:
recency
history_segment
history
mens
womens
zip_code
newbie
channel
segment
visit
conversion
spend
DM_category
0
10
2) $100 - $200
142.44
1
0
Surburban
0
Phone
Womens E-Mail
0
0
0.0
4
1
6
3) $200 - $350
329.08
1
1
Rural
1
Web
No E-Mail
0
0
0.0
11
In [21]:
df.recency < 7
Out[21]:
0 False
1 True
2 False
3 False
4 True
5 True
6 False
7 False
8 False
9 False
10 False
11 True
12 True
13 True
14 True
15 True
16 True
17 False
18 False
19 True
20 False
21 False
22 True
23 True
24 True
25 True
26 False
27 True
28 False
29 True
...
63970 True
63971 True
63972 True
63973 False
63974 False
63975 False
63976 True
63977 False
63978 False
63979 False
63980 True
63981 True
63982 True
63983 True
63984 True
63985 False
63986 False
63987 True
63988 True
63989 False
63990 True
63991 True
63992 True
63993 True
63994 False
63995 False
63996 True
63997 True
63998 True
63999 True
Name: recency, dtype: bool
In [22]:
df[ df.recency < 7 ]
Out[22]:
recency
history_segment
history
mens
womens
zip_code
newbie
channel
segment
visit
conversion
spend
DM_category
1
6
3) $200 - $350
329.08
1
1
Rural
1
Web
No E-Mail
0
0
0.0
11
4
2
1) $0 - $100
45.34
1
0
Urban
0
Web
Womens E-Mail
0
0
0.0
4
5
6
2) $100 - $200
134.83
0
1
Surburban
0
Phone
Womens E-Mail
1
0
0.0
1
11
1
3) $200 - $350
211.45
0
1
Urban
1
Phone
Womens E-Mail
0
0
0.0
1
12
5
5) $500 - $750
642.90
0
1
Surburban
1
Multichannel
Womens E-Mail
0
0
0.0
1
13
2
2) $100 - $200
101.64
0
1
Urban
0
Web
Mens E-Mail
1
0
0.0
3
14
4
3) $200 - $350
241.42
0
1
Rural
1
Multichannel
No E-Mail
0
0
0.0
5
15
3
1) $0 - $100
58.13
1
0
Urban
1
Web
No E-Mail
1
0
0.0
6
16
5
1) $0 - $100
29.99
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
19
5
6) $750 - $1,000
828.42
1
0
Surburban
1
Multichannel
Mens E-Mail
0
0
0.0
2
22
2
2) $100 - $200
118.40
1
0
Surburban
0
Web
Mens E-Mail
1
0
0.0
2
23
2
1) $0 - $100
29.99
0
1
Urban
1
Phone
No E-Mail
0
0
0.0
5
24
4
1) $0 - $100
78.24
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
25
6
2) $100 - $200
139.87
0
1
Rural
1
Web
Mens E-Mail
0
0
0.0
3
27
6
2) $100 - $200
162.98
0
1
Surburban
0
Web
Mens E-Mail
0
0
0.0
3
29
2
3) $200 - $350
203.35
1
0
Rural
0
Web
No E-Mail
0
0
0.0
6
30
2
3) $200 - $350
237.53
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
32
6
2) $100 - $200
128.01
0
1
Urban
0
Web
Mens E-Mail
0
0
0.0
3
34
3
1) $0 - $100
29.99
1
0
Rural
0
Web
Womens E-Mail
0
0
0.0
4
35
4
3) $200 - $350
218.72
0
1
Urban
0
Multichannel
Womens E-Mail
0
0
0.0
1
36
1
5) $500 - $750
514.52
0
1
Surburban
1
Web
Mens E-Mail
0
0
0.0
3
37
4
6) $750 - $1,000
766.47
1
1
Urban
1
Multichannel
Mens E-Mail
0
0
0.0
5
41
3
1) $0 - $100
99.23
1
0
Rural
0
Web
Mens E-Mail
1
0
0.0
2
43
2
4) $350 - $500
492.02
1
0
Surburban
0
Phone
No E-Mail
0
0
0.0
6
44
1
1) $0 - $100
48.32
0
1
Urban
0
Web
No E-Mail
0
0
0.0
5
46
2
4) $350 - $500
391.33
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
47
1
5) $500 - $750
729.70
1
1
Surburban
1
Web
Mens E-Mail
0
0
0.0
5
48
3
2) $100 - $200
134.59
1
0
Urban
1
Phone
Womens E-Mail
1
0
0.0
4
50
3
3) $200 - $350
203.30
0
1
Surburban
0
Web
No E-Mail
0
0
0.0
5
55
6
1) $0 - $100
42.66
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
...
...
...
...
...
...
...
...
...
...
...
...
...
...
63949
5
1) $0 - $100
86.79
1
0
Rural
0
Phone
No E-Mail
0
0
0.0
6
63950
1
1) $0 - $100
45.67
0
1
Surburban
1
Web
Mens E-Mail
0
0
0.0
3
63953
5
2) $100 - $200
166.24
0
1
Urban
0
Phone
No E-Mail
0
0
0.0
5
63954
2
1) $0 - $100
93.97
1
0
Urban
0
Web
No E-Mail
1
0
0.0
6
63955
1
1) $0 - $100
29.99
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
63960
1
3) $200 - $350
221.89
0
1
Surburban
1
Multichannel
No E-Mail
0
0
0.0
5
63961
4
3) $200 - $350
337.36
1
0
Urban
0
Web
Mens E-Mail
1
0
0.0
2
63964
2
6) $750 - $1,000
772.99
1
1
Surburban
1
Web
Mens E-Mail
0
0
0.0
5
63966
4
2) $100 - $200
170.03
1
0
Surburban
0
Web
Womens E-Mail
0
0
0.0
4
63967
5
1) $0 - $100
77.73
0
1
Urban
1
Phone
No E-Mail
0
0
0.0
5
63969
3
1) $0 - $100
67.78
0
1
Surburban
0
Web
Womens E-Mail
0
0
0.0
1
63970
4
2) $100 - $200
191.15
0
1
Surburban
1
Web
Womens E-Mail
0
0
0.0
1
63971
5
5) $500 - $750
549.87
0
1
Surburban
1
Phone
Womens E-Mail
0
0
0.0
1
63972
3
5) $500 - $750
554.97
0
1
Surburban
1
Web
No E-Mail
0
0
0.0
5
63976
1
5) $500 - $750
710.72
1
1
Urban
1
Phone
No E-Mail
0
0
0.0
11
63980
3
4) $350 - $500
487.10
0
1
Surburban
1
Phone
No E-Mail
0
0
0.0
5
63981
4
2) $100 - $200
125.53
0
1
Rural
1
Phone
No E-Mail
0
0
0.0
5
63982
5
1) $0 - $100
29.99
1
0
Urban
1
Phone
Mens E-Mail
0
0
0.0
2
63983
2
1) $0 - $100
83.03
0
1
Urban
0
Phone
No E-Mail
0
0
0.0
5
63984
2
3) $200 - $350
209.51
0
1
Urban
1
Web
Womens E-Mail
0
0
0.0
1
63987
1
1) $0 - $100
79.70
1
0
Surburban
1
Web
No E-Mail
0
0
0.0
6
63988
6
1) $0 - $100
32.98
1
0
Surburban
0
Web
Mens E-Mail
0
0
0.0
2
63990
6
1) $0 - $100
80.02
0
1
Surburban
0
Phone
No E-Mail
0
0
0.0
5
63991
1
3) $200 - $350
306.10
1
0
Surburban
1
Phone
Womens E-Mail
0
0
0.0
4
63992
1
5) $500 - $750
519.69
1
1
Urban
1
Phone
Mens E-Mail
0
0
0.0
5
63993
4
4) $350 - $500
374.07
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
63996
5
1) $0 - $100
38.91
0
1
Urban
1
Phone
Mens E-Mail
0
0
0.0
3
63997
6
1) $0 - $100
29.99
1
0
Urban
1
Phone
Mens E-Mail
0
0
0.0
2
63998
1
5) $500 - $750
552.94
1
0
Surburban
1
Multichannel
Womens E-Mail
0
0
0.0
4
63999
1
4) $350 - $500
472.82
0
1
Surburban
0
Web
Mens E-Mail
0
0
0.0
3
36585 rows × 13 columns
In [23]:
df[ (df.recency < 7) & (df.newbie == 0) ]
Out[23]:
recency
history_segment
history
mens
womens
zip_code
newbie
channel
segment
visit
conversion
spend
DM_category
4
2
1) $0 - $100
45.34
1
0
Urban
0
Web
Womens E-Mail
0
0
0.0
4
5
6
2) $100 - $200
134.83
0
1
Surburban
0
Phone
Womens E-Mail
1
0
0.0
1
13
2
2) $100 - $200
101.64
0
1
Urban
0
Web
Mens E-Mail
1
0
0.0
3
16
5
1) $0 - $100
29.99
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
22
2
2) $100 - $200
118.40
1
0
Surburban
0
Web
Mens E-Mail
1
0
0.0
2
24
4
1) $0 - $100
78.24
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
27
6
2) $100 - $200
162.98
0
1
Surburban
0
Web
Mens E-Mail
0
0
0.0
3
29
2
3) $200 - $350
203.35
1
0
Rural
0
Web
No E-Mail
0
0
0.0
6
30
2
3) $200 - $350
237.53
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
32
6
2) $100 - $200
128.01
0
1
Urban
0
Web
Mens E-Mail
0
0
0.0
3
34
3
1) $0 - $100
29.99
1
0
Rural
0
Web
Womens E-Mail
0
0
0.0
4
35
4
3) $200 - $350
218.72
0
1
Urban
0
Multichannel
Womens E-Mail
0
0
0.0
1
41
3
1) $0 - $100
99.23
1
0
Rural
0
Web
Mens E-Mail
1
0
0.0
2
43
2
4) $350 - $500
492.02
1
0
Surburban
0
Phone
No E-Mail
0
0
0.0
6
44
1
1) $0 - $100
48.32
0
1
Urban
0
Web
No E-Mail
0
0
0.0
5
46
2
4) $350 - $500
391.33
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
50
3
3) $200 - $350
203.30
0
1
Surburban
0
Web
No E-Mail
0
0
0.0
5
55
6
1) $0 - $100
42.66
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
59
3
2) $100 - $200
143.93
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
61
2
1) $0 - $100
96.91
1
0
Surburban
0
Phone
Womens E-Mail
0
0
0.0
4
65
5
3) $200 - $350
222.07
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
70
2
3) $200 - $350
278.80
1
0
Rural
0
Web
Mens E-Mail
0
0
0.0
2
72
2
4) $350 - $500
428.74
1
0
Rural
0
Phone
Mens E-Mail
0
0
0.0
2
76
4
2) $100 - $200
194.11
1
0
Urban
0
Phone
No E-Mail
0
0
0.0
6
79
2
1) $0 - $100
29.99
0
1
Surburban
0
Phone
Mens E-Mail
0
0
0.0
3
81
2
1) $0 - $100
95.33
0
1
Surburban
0
Web
No E-Mail
1
0
0.0
5
86
2
1) $0 - $100
82.59
1
0
Surburban
0
Phone
Womens E-Mail
0
0
0.0
4
87
6
2) $100 - $200
165.77
0
1
Urban
0
Web
Womens E-Mail
1
0
0.0
1
95
3
2) $100 - $200
133.51
1
0
Urban
0
Web
Mens E-Mail
0
0
0.0
2
97
1
1) $0 - $100
47.44
0
1
Urban
0
Phone
Womens E-Mail
0
0
0.0
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
63890
2
3) $200 - $350
229.79
1
1
Urban
0
Phone
Mens E-Mail
1
0
0.0
5
63891
4
1) $0 - $100
65.23
0
1
Urban
0
Phone
Womens E-Mail
0
0
0.0
1
63893
2
3) $200 - $350
288.30
1
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
5
63901
4
2) $100 - $200
158.91
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
63902
3
3) $200 - $350
309.39
1
1
Urban
0
Phone
Womens E-Mail
0
0
0.0
5
63904
1
1) $0 - $100
29.99
0
1
Surburban
0
Web
Womens E-Mail
1
0
0.0
1
63906
4
3) $200 - $350
311.63
0
1
Urban
0
Multichannel
No E-Mail
0
0
0.0
5
63910
6
1) $0 - $100
29.99
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
63911
5
4) $350 - $500
373.13
0
1
Urban
0
Phone
Womens E-Mail
0
0
0.0
1
63915
4
3) $200 - $350
300.90
1
0
Surburban
0
Phone
No E-Mail
0
0
0.0
6
63916
6
1) $0 - $100
34.50
1
0
Urban
0
Phone
Womens E-Mail
0
0
0.0
4
63918
3
1) $0 - $100
29.99
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
63919
3
2) $100 - $200
149.96
1
0
Urban
0
Web
Womens E-Mail
0
0
0.0
4
63924
4
2) $100 - $200
101.40
0
1
Urban
0
Web
No E-Mail
0
0
0.0
5
63931
2
1) $0 - $100
29.99
1
0
Surburban
0
Web
No E-Mail
0
0
0.0
6
63932
1
4) $350 - $500
426.36
1
1
Surburban
0
Multichannel
Mens E-Mail
1
0
0.0
5
63935
4
4) $350 - $500
353.47
1
1
Urban
0
Multichannel
Mens E-Mail
0
0
0.0
5
63941
2
2) $100 - $200
130.96
1
0
Urban
0
Phone
Mens E-Mail
0
0
0.0
2
63949
5
1) $0 - $100
86.79
1
0
Rural
0
Phone
No E-Mail
0
0
0.0
6
63953
5
2) $100 - $200
166.24
0
1
Urban
0
Phone
No E-Mail
0
0
0.0
5
63954
2
1) $0 - $100
93.97
1
0
Urban
0
Web
No E-Mail
1
0
0.0
6
63955
1
1) $0 - $100
29.99
1
0
Surburban
0
Phone
Mens E-Mail
0
0
0.0
2
63961
4
3) $200 - $350
337.36
1
0
Urban
0
Web
Mens E-Mail
1
0
0.0
2
63966
4
2) $100 - $200
170.03
1
0
Surburban
0
Web
Womens E-Mail
0
0
0.0
4
63969
3
1) $0 - $100
67.78
0
1
Surburban
0
Web
Womens E-Mail
0
0
0.0
1
63983
2
1) $0 - $100
83.03
0
1
Urban
0
Phone
No E-Mail
0
0
0.0
5
63988
6
1) $0 - $100
32.98
1
0
Surburban
0
Web
Mens E-Mail
0
0
0.0
2
63990
6
1) $0 - $100
80.02
0
1
Surburban
0
Phone
No E-Mail
0
0
0.0
5
63993
4
4) $350 - $500
374.07
0
1
Surburban
0
Phone
Womens E-Mail
0
0
0.0
1
63999
1
4) $350 - $500
472.82
0
1
Surburban
0
Web
Mens E-Mail
0
0
0.0
3
17551 rows × 13 columns
In [35]:
ordered_satisfaction = ['Very Unhappy', 'Unhappy', 'Neutral', 'Happy', 'Very Happy']
In [40]:
df = pd.DataFrame({'satisfaction':['Mad', 'Happy', 'Unhappy', 'Neutral']})
df.satisfaction = df.satisfaction.astype("category",
ordered=True,
categories=ordered_satisfaction
).cat.codes
df.satisfaction
Out[40]:
0 -1
1 3
2 1
3 2
Name: satisfaction, dtype: int8
In [33]:
df = pd.DataFrame({'vertebrates':[
'Bird',
'Bird',
'Mammal',
'Fish',
'Amphibian',
'Reptile',
'Mammal',
]})
In [34]:
df['vertebrates cata'] = df.vertebrates.astype("category").cat.codes
df.vertebrates.unique()
Out[34]:
array(['Bird', 'Mammal', 'Fish', 'Amphibian', 'Reptile'], dtype=object)
In [49]:
df = pd.get_dummies(df,columns=['vertebrates'])
df
Out[49]:
vertebrates cata
vertebrates_Amphibian
vertebrates_Bird
vertebrates_Fish
vertebrates_Mammal
vertebrates_Reptile
0
1
0
1
0
0
0
1
1
0
1
0
0
0
2
3
0
0
0
1
0
3
2
0
0
1
0
0
4
0
1
0
0
0
0
5
4
0
0
0
0
1
6
3
0
0
0
1
0
In [50]:
from sklearn.feature_extraction.text import CountVectorizer
In [51]:
corpus = [
"Authman ran faster than Harry because he is an athlete.",
"Authman and Harry ran faster and faster.",
]
In [54]:
bow = CountVectorizer()
X = bow.fit_transform(corpus)
In [55]:
bow.get_feature_names()
Out[55]:
['an',
'and',
'athlete',
'authman',
'because',
'faster',
'harry',
'he',
'is',
'ran',
'than']
In [56]:
X.toarray()
Out[56]:
array([[1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1],
[0, 2, 0, 1, 0, 2, 1, 0, 0, 1, 0]], dtype=int64)
In [ ]:
Content source: jeffmkw/DAT210x-Lab
Similar notebooks: